########################################################
### Script to Alamos Concha, Priscilla 2018:
### Large-Scale contentious politics 
########################################################


## The R code in this file tests alternative anchors for calibration

# For replication purposes of YOUTH BULGE 2010 # see (United Nations-DESA-population
# division, 2010; Lar�mont, 2013; Brownlee, et.al. 2015; Maarten et.al.,2014)
# https://esa.un.org/unpd/wpp/Download/Standard/Population/

## The R code was written by using the packages QCA 3.1, 3.3 and SetMethods 2.3, 2.4


######################################################
### Double check and test alternative calibrations ###
######################################################


rm(list = ls())

# Set working directory:
setwd("D:/UCLouvain/PhD Thesis/PhD/2018/Dataset/Robustness")
Don <-read.csv("Raw data cal_rob.csv", row.names=1)


# load packages
library(QCA); library(QCAGUI); library(SetMethods); library(lattice);
library(arm); library(plyr); library(car); library(stringr); library(xtable)
library(betareg); library (gmodels); library (Hmisc); library (MASS);
library (memisc); library (polycor); library (psych); library (reshape); 
library (VIM); library (XML); library (foreign); library (directlabels);
library (SetMethods); dependencies = TRUE

# we test only fuzzy sets, that is
# YOUTH

# our anchors so far are (fully out/cross over/fully in):
# 10,16,35,    # YOUTH bulge

#	the raw scores look like this:
summary(Don$YOUTH)
# note: max is 6 here for this selection of ldp (low PHYSREP), 
# yet, the overall max of this indicator is 8, meaning no physrep

th_1 <- c(10,16,35)


# visualization of calibration see script on data preparation

conds <- c("YOUTH")
conds_anchors <- matrix(c(10, 16, 35),
                        ncol=3, byrow = TRUE)

for (i in 1:length(conds)) {
  Don[ ,11+i] <- calibrate(Don[ ,conds[i]], type = "fuzzy", logistic = TRUE, idm = 0.95,
                           thresholds = c("e"= conds_anchors[i,1],"c"= conds_anchors[i,2],"i"= conds_anchors[i,3]))
}

# name the new columns just added to the data, using their original name with "low" 
names(Don)[c(12)] <- paste0("HIGH", conds)


#	now, let's try alternative calibrations 
th_2 <- c(8, 14, 30)
th_3 <- c(7, 12, 28)

#	make new fuzzy variables
Don$highyouth1 <- calibrate(Don$YOUTH, 'fuzzy', thresholds=th_2, logistic=T)
Don$highyouth2 <- calibrate(Don$YOUTH, 'fuzzy', thresholds=th_3, logistic=T)

#	compare the three calibrations:

par(mfrow=c(1, 3))
plot(Don$YOUTH, Don$HIGHYOUTH, pch=19, col=rgb(0,0,1,0.5),
     main= "Old Calibration YOUTH BULGE",
     xlab='Raw score',
     ylab='Fuzzy score')
abline(h=0.5, col=rgb(.5,.5,.5,.5))
plot(Don$YOUTH, Don$highyouth1, pch=19, col=rgb(0,0,1,0.5),
     main='1st alternative calibration YOUTH BULGE',
     xlab='Raw score',
     ylab='Fuzzy score')
abline(h=0.5, col=rgb(.5,.5,.5,.5))	
plot(Don$YOUTH, Don$highyouth2, pch=19, col=rgb(0,0,1,0.5),
     main='2nd alternative calibration YOUTH BULGE',
     xlab='Raw score',
     ylab='Fuzzy score')
abline(h=0.5, col=rgb(.5,.5,.5,.5))

# See cases per plot, 
rownames(subset(Don, HIGHYOUTH > 0.5))
rownames(subset(Don, highyouth1 > 0.5))
rownames(subset(Don, highyouth2 > 0.5))

#	now plot all alternatives with outcome 

par(mfrow=c(2, 2))
xy.plot(Don$HIGHYOUTH, Don$LSCONTPOL, necessity=F, 
        main='Old calibration YOUTH',
        xlab='YOUTH_old', ylab='LSCONTPOL')
xy.plot(Don$highyouth1, Don$LSCONTPOL, necessity=F,
        main='1st alternative calibration YOUTH',
        xlab='YOUTH_1', ylab='LSCONTPOL')
xy.plot(Don$highyouth2, Don$LSCONTPOL, necessity=F,
        main='2nd alternative calibration YOUTH',
        xlab='YOUTH_2', ylab='LSCONTPOL')


#	now let's consider parameters of fit for sufficiency of low repression
#	for outcome = LSCONTPOL:

nf1 <- QCAfit(Don$LSCONTPOL, Don$HIGHYOUTH,  necessity=F)
nf2 <- QCAfit(Don$LSCONTPOL, Don$highyouth1, necessity=F)
nf3 <- QCAfit(Don$LSCONTPOL, Don$highyouth2, necessity=F)

rbind(nf1, nf2, nf3)

# use first alternative anchor for alternative QCA
# / outcome "LSCONTPOL":

# to prevent confusion, erase uncalibrated columns and proximate conditions:
Don[ ,(6:11)] <- NULL
Don[ ,(4)] <- NULL


##################################################################
##################################################################


### Analysis of necessity ###
#############################

QCAfit(Don[, 2:7], Don$LSCONTPOL, necessity = TRUE, names(Don[, 2:7]))

QCAfit(1-Don[, 2:7], Don$LSCONTPOL, necessity = TRUE, paste("�", names(Don[, 2:7])))

SUIN <- superSubset(Don, outcome = "LSCONTPOL",
                    conditions = c("NONHERED", "OILPOOR","CORRUPT", "HIGHYOUTH", "highyouth1", "highyouth2"),
                    incl.cut = 0.9, 
                    cov.cut = 0.8,
                    necessity = TRUE)
SUIN



##############################################################################
##############################################################################

# Conclusion of the alternative calibrations for the Robustness Tests:
# There is no alternative anchors that change the necessity value in a meaningful way
# (new values have very low coverage and low relevance of necessity)
# the "old" anchor seem to represent the meaning of the sets in a good way, although remaining 
# very far of being considered in the set of a necessary condition. 
# The "old" anchors is therefore correct.

